import requests
import json
from pyquery import PyQuery as pq


def getPage(url):
    '''爬取置顶URL地址信息'''
    headers = {
        'Cookie': '__jda=122270672.1536629792638875992588.1536629793.1536629793.1536629793.1; __jdc=122270672; __jdv=122270672|direct|-|none|-|1536629792638; aud=09ab605c2548d1971c6722e654cfe411; aud_ver=2; avt=1; __jdu=1536629792638875992588; shshshfpb=0f938e272973e74255f0ae10583994be2bfc62cc5cf638ab35b9625371; PCSYCityID=1; shshshfpa=b54b3a8a-04ff-5462-cb16-f9edb05a3b9d-1536629792; user-key=ccb62238-9651-4557-b2bb-5eb34b8bf500; ipLoc-djd=1-72-4137-0; areaId=1; cn=4; shshshfp=89743d1dfb1f865482cd7a912c54b740; __jdb=122270672.13.1536629792638875992588|1.1536629793; asn=12; shshshsID=14eef02167502a023faa88824a275406_10_1536633303659',
        'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
    }
    try:
        res = requests.get(url, headers=headers)
        if res.status_code == 200:
            # print(res.text)
            return res.text
        else:
            return None
    except:
        return None


def parsePage(content):
    '''解析页面内容'''
    doc = pq(content)
    items = doc("div[class='item-form']")
    # print(len(items))
    for item in items.items():
        yield {
            'title': item.find("a[clstag='clickcart|keycount|xincart|cart_sku_name']").text(),
            'color': item.find('div.props-txt').attr['title'],
            'ban': item.find('div.props-txt:eq(1)').attr['title'],
            'score': item.find("div.p-price-new strong").text(),
            'image': item.find('img').attr['src'],
        }


def writeFile(content):
    '''执行爬取内容的读写操作'''
    with open('./jd.txt', 'a', encoding='utf-8') as f:
        f.write(json.dumps(content, ensure_ascii=False) + '\n')


def main(url):
    '''主函数'''
    html = getPage(url)
    if html:
        for item in parsePage(html):
            print(item)
            writeFile(item)


if __name__ == '__main__':
    url = 'https://cart.jd.com/cart.action'
    main(url)
